from bs4 import BeautifulSoup
import scrapy
from ..items import ItblogItem


class CnblogsSpider(scrapy.Spider):
    name = "cnblogs"
    allowed_domains = ["www.cnblogs.com"]
    start_urls = ["https://www.cnblogs.com"]

    def parse(self, response):
        bs = BeautifulSoup(response.text, "lxml")

        sections = bs.select("#post_list article.post-item section.post-item-body")
        for section in sections:
            item = ItblogItem()
            try:
                # 获取标题
                item["title"] = section.select_one("div.post-item-text a.post-item-title").text
                # 获取图片的URL
                img = section.select_one("div.post-item-text p.post-item-summary img.avatar")
                item["image_urls"] = [img["src"]]

                yield item
            except TypeError:
                pass






